# Core libraries for data manipulation and numerical operations
import pandas as pd
import numpy as np

# Library for downloading financial data
import yfinance as yf

# Library for data visualization (only 'pyplot' and 'dates' modules from matplotlib library are being imported here)
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Specifying tickers, their weights and portfolio value

tickers = ['JPM', 'KRE', 'SPY']
weights = np.array([1/3, 1/3, 1/3])
portfolio_value = 1_000_000  

# Specifying the dates for lookback window (1 trading year before the backtesting period begins)
start_date = '2022-01-01'
end_date = '2023-04-30'

# Downloading all available data for the tickers
data = yf.download(tickers, start=start_date, end=end_date)
print(data.columns)

/tmp/ipykernel_1840440/2254962435.py:12: FutureWarning: YF.download() has changed argument auto_adjust default to True
  data = yf.download(tickers, start=start_date, end=end_date)
[*********************100%***********************]  3 of 3 completed

MultiIndex([( 'Close', 'JPM'),
            ( 'Close', 'KRE'),
            ( 'Close', 'SPY'),
            (  'High', 'JPM'),
            (  'High', 'KRE'),
            (  'High', 'SPY'),
            (   'Low', 'JPM'),
            (   'Low', 'KRE'),
            (   'Low', 'SPY'),
            (  'Open', 'JPM'),
            (  'Open', 'KRE'),
            (  'Open', 'SPY'),
            ('Volume', 'JPM'),
            ('Volume', 'KRE'),
            ('Volume', 'SPY')],
           names=['Price', 'Ticker'])

# Calculating log returns using the 'Close' price
close_prices = data['Close']
log_returns = np.log(1 + close_prices.pct_change())

# Calculating the weighted average of the individual asset returns
portfolio_returns = log_returns.dot(weights)

# Drop the first row which is NaN (since there's no previous day to calculate a return)
portfolio_returns = portfolio_returns.dropna()
portfolio_returns.name = 'Portfolio Return'

print("Data downloaded and prepared.")
print(f"Portfolio returns from {portfolio_returns.index.min().date()} to {portfolio_returns.index.max().date()}")
print("\nSample of Close Prices DataFrame:")
print(close_prices.head())
print("\nSample of Final Portfolio Returns Series:")
print(portfolio_returns.head())

Data downloaded and prepared.
Portfolio returns from 2022-01-04 to 2023-04-28

Sample of Close Prices DataFrame:
Ticker             JPM        KRE         SPY
Date                                         
2022-01-03  146.291061  65.188011  454.466919
2022-01-04  151.836884  67.030655  454.314667
2022-01-05  149.060974  66.597115  445.590851
2022-01-06  150.644638  69.117195  445.172272
2022-01-07  152.137238  69.794617  443.412323

Sample of Final Portfolio Returns Series:
Date
2022-01-04    0.021583
2022-01-05   -0.014776
2022-01-06    0.015590
2022-01-07    0.005217
2022-01-10   -0.000614
Name: Portfolio Return, dtype: float64

# Specifying the backtesting parameters
lookback_days = 252  
confidence_level = 0.99
alpha = 1 - confidence_level         

# Initiating a list to store the results
results = []

# Iterating over the lookback window, the loop starts at index 252 and not 0 so that there is full 252 days of prior data to look at on the very first iteration as well.
for i in range(lookback_days, len(portfolio_returns)):
    
    # Creating a slice of the last 252 returns before the current day
    historical_window = portfolio_returns.iloc[i - lookback_days : i]
    
    # Calculating VaR
    var_99 = -np.percentile(historical_window, 100 * alpha)
    
    # Fetching the actual return for the current day 
    actual_return = portfolio_returns.iloc[i]
    
    # Appending the results (date, VaR, actual return)
    results.append({
        'Date': portfolio_returns.index[i],
        'VaR_99': var_99,
        'Actual_Return': actual_return
    })

# Converting the list of results into a pandas DataFrame
results_df = pd.DataFrame(results)
results_df.set_index('Date', inplace=True)

print("Backtest complete. Results DataFrame created.","\n")
print(results_df.head())

Backtest complete. Results DataFrame created. 

              VaR_99  Actual_Return
Date                               
2023-01-05  0.036237      -0.010276
2023-01-06  0.036237       0.023506
2023-01-09  0.036237      -0.004598
2023-01-10  0.036237       0.007827
2023-01-11  0.036237       0.008498

# Comparing the values and clreating a boolean column for breach which is "True" if the actual loss was more than predicted VaR and "False" otherwise.
results_df['Breach'] = results_df['Actual_Return'] < -results_df['VaR_99']

# Calculating and printing the results
num_breaches = results_df['Breach'].sum()
total_days = len(results_df)
breach_rate = num_breaches / total_days

print(f"Backtesting Period: {results_df.index.min().date()} to {results_df.index.max().date()}")
print(f"Total Trading Days: {total_days}")
print(f"Number of Breaches: {num_breaches}")
print(f"Breach Rate: {breach_rate:.2%}")
print(f"Expected Breaches at 99% Confidence: {total_days * alpha:.2f}")

# Filters only the rows where 'Breach' == True'
breach_days = results_df[results_df['Breach']]

# Displaying the specific days where the model failed
print("\nBreach Days: ")
print(breach_days)

Backtesting Period: 2023-01-05 to 2023-04-28
Total Trading Days: 79
Number of Breaches: 3
Breach Rate: 3.80%
Expected Breaches at 99% Confidence: 0.79

Breach Days: 
              VaR_99  Actual_Return  Breach
Date                                       
2023-03-09  0.033380      -0.052956    True
2023-03-13  0.034967      -0.050304    True
2023-03-17  0.036566      -0.037364    True

# Specifying the figure size
plt.figure(figsize=(15, 7))

# Plotting Actual Returns and VaR
plt.plot(results_df.index, results_df['Actual_Return'], 'b', label='Actual Portfolio Return')
plt.plot(results_df.index, -results_df['VaR_99'], 'r--', label='-VaR (99%)')

# Marking the breach days
breach_dates = breach_days.index
breach_values = breach_days['Actual_Return']
plt.scatter(breach_dates, breach_values, color='r', marker='o', s=100, label='Breach Event')

# Formatting the plot
plt.title('VaR Backtest: Portfolio Returns vs. 99% Historical VaR', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Daily Return', fontsize=12)
plt.legend()
plt.tight_layout()

# Displaying the plot
plt.show()

# Importing the libraries for data analysis, sending API requests and interacting with AI models
import pandas as pd
import requests
import os
from datetime import datetime
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import google.generativeai as genai

# Downloading lexicon for the Vader
nltk.download('vader_lexicon')

/home/latika/base_scripts/venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/latika/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!

True

# Replace the placeholder text with actual API keys.
MARKETAUX_API_KEY = "YOUR MARKETAUX API KEY HERE"
GEMINI_API_KEY = "YOUR GOOGLE AI STUDIO API KEY HERE"

# This is a proactive step to be aware of all the models that can be accessed using the given API Key.
print("--- Available Gemini Models ---")
try:
    genai.configure(api_key=GEMINI_API_KEY)
    for m in genai.list_models():
      if 'generateContent' in m.supported_generation_methods:
        print(m.name)
except Exception as e:
    print(f"Could not list models. Error: {e}")

--- Available Gemini Models ---
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-04-17
models/gemini-2.5-flash-preview-05-20
models/gemini-2.5-flash
models/gemini-2.5-flash-preview-04-17-thinking
models/gemini-2.5-flash-lite-preview-06-17
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-preview-image-generation
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/gemini-2.5-flash-preview-tts
models/gemini-2.5-pro-preview-tts
models/learnlm-2.0-flash-experimental
models/gemma-3-1b-it
models/gemma-3-4b-it
models/gemma-3-12b-it
models/gemma-3-27b-it
models/gemma-3n-e4b-it
models/gemma-3n-e2b-it

# Defining the function to Fetch News 
def get_financial_news(date_str: str, keywords: list) -> list:
    """Fetches financial news headlines for a specific date from MarketAux API."""
    
    search_query = " | ".join(f'"{term}"' for term in keywords)
    url = (f"https://api.marketaux.com/v1/news/all?"
           f"search={search_query}"
           f"&language=en"
           f"&published_on={date_str}"
           f"&limit=3"  
           f"&api_token={MARKETAUX_API_KEY}")
    
    # Using a try-except block to avoid the program from crashing entirely
    try:
        response = requests.get(url)
        response.raise_for_status()
        
        # Extracting the data from JSON format
        articles = response.json().get('data', [])
        return [article['title'] for article in articles]
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching news from MarketAux: {e}")
        return []

# Defining the function for Sentiment Analysis 
def get_sentiment_score(headlines: list) -> float:
    """Performs sentiment analysis on headlines and returns the average compound score."""
    
    if not headlines: return 0.0
    sia = SentimentIntensityAnalyzer()
    
    # Calculating the compound score for each headline, summing them up to calculate the average coumpound score
    total_compound_score = sum(sia.polarity_scores(headline)['compound'] for headline in headlines)
    avg_compound_score = total_compound_score / len(headlines)
    return avg_compound_score

# Defining the function to Generate AI Summary 
def generate_news_summary(headlines: list, date_str: str, var_value: float, actual_loss: float) -> str:
    """Uses Google's Gemini LLM to generate a 'News-Driven Cause' summary."""
    
    if not headlines: return "No news headlines were found for this date to generate a summary."

    # Formatting the headlines into a clean, bulleted list for the AI
    headline_str = "\n".join(f"- {h}" for h in headlines)
    
    # Specifying a detailed instruction set (the "prompt") for the AI
    prompt = f"""
    You are a professional financial analyst writing a risk report. Your task is to explain why a significant market loss occurred on a specific date, based on the news headlines from that day.

    **Context:**
    - **Date of Breach:** {date_str}
    - **Predicted Max Loss (VaR):** {var_value:.2f}%
    - **Actual Loss:** {actual_loss:.2f}%

    **News Headlines from that Day:**
    {headline_str}

    **Your Task:**
    Analyze these headlines and write a concise, one-paragraph explanation (the "News-Driven Cause") for the market drop. Synthesize the information into a coherent narrative. Focus on the most impactful events and entities (like SVB, Credit Suisse, etc.) if they are prominent. Explain the 'why'.
    """
    
    # Enclosing the tasks in a try-except block to avoid crashing of the entire program.
    try:
        model = genai.GenerativeModel('gemini-2.5-flash')
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        return f"An error occurred while generating the summary with the LLM: {e}"

# Importing the 'time' library to add pauses
import time

# Loop through each row in the `breach_days` DataFrame
for index, row in breach_days.iterrows():
    
    # Extracting the data from the row. The date is the index, so `row.name` is used.
    breach_date_dt = row.name
    breach_date_str = breach_date_dt.strftime('%Y-%m-%d')
    var_value = row['VaR_99'] * 100
    actual_loss = abs(row['Actual_Return']) * 100

    # Print a header for the current day's analysis
    print("==========================================================")
    print(f"Analyzing Breach on: {breach_date_dt.strftime('%B %d, %Y')}")
    print("==========================================================")

    # Defining keywords and fetching news
    search_keywords = ["SIVB", "SBNY", "FRC", "CS", "KRE", "banking crisis", "bank run", "contagion", "Silicon Valley Bank", "selling bonds", "S&P"]
    headlines = get_financial_news(breach_date_str, search_keywords)
    
    if not headlines:
        print("No relevant news found for the given keywords on this date.\n")
        # Skip to the next date in the loop
        continue 
        
    print(f"Found {len(headlines)} relevant headlines.")

    # Calculating the sentiment Score
    sentiment_score = get_sentiment_score(headlines)
    
    # Generating summary with LLM
    news_cause = generate_news_summary(headlines, breach_date_str, var_value, actual_loss)

    # Print the final, formatted output for this day's report
    print("\n*** Model Result & News-Driven Cause ***\n")
    print(f"*   **Model Result:** VaR predicted a max loss of {var_value:.2f}%; the actual loss was {actual_loss:.2f}%.")
    print(f"*   **News Sentiment Score:** {sentiment_score:.3f} (A score below -0.1 is typically negative)")
    print(f"*   **News-Driven Cause:** {news_cause}")
    print("\n")
    
    # Adding a short pause to respect the API's rate limit (requests per minute). This prevents the 429 "Too Many Requests" error.
    time.sleep(15)

==========================================================
Analyzing Breach on: March 09, 2023
==========================================================
Found 3 relevant headlines.

*** Model Result & News-Driven Cause ***

*   **Model Result:** VaR predicted a max loss of 3.34%; the actual loss was 5.30%.
*   **News Sentiment Score:** -0.507 (A score below -0.1 is typically negative)
*   **News-Driven Cause:** On March 9, 2023, the market experienced a significant downturn, with actual losses exceeding the predicted VaR, primarily due to an acute crisis of confidence in the banking sector. News headlines detailed Silicon Valley Bank's (SVB) "sudden liquidity crisis," which resulted in a "record 60% crash" in its stock and caused "bank stocks [to] crater" broadly. This severe distress at SVB sent a stark "warning sign" across the financial system, leading to widespread contagion and fear, evidenced by other institutions like Signature Bank experiencing stock declines despite claims of financial strength. The rapid and systemic erosion of trust in banks, triggered by SVB's woes, explains the amplified market losses beyond expectations.


==========================================================
Analyzing Breach on: March 13, 2023
==========================================================
Found 3 relevant headlines.

*** Model Result & News-Driven Cause ***

*   **Model Result:** VaR predicted a max loss of 3.50%; the actual loss was 5.03%.
*   **News Sentiment Score:** -0.640 (A score below -0.1 is typically negative)
*   **News-Driven Cause:** The significant market loss on March 13, 2023, which exceeded the predicted VaR, was primarily driven by a sharp resurgence in regional bank contagion fears, despite initial signs of fading anxiety from the Silicon Valley Bank (SVB) crash. While US stock futures initially rose on hopes of containing the SVB fallout, this optimism was quickly overwhelmed by the dramatic 70% plunge in First Republic Bank (FRC) shares. This severe decline, compounded by an analyst downgrade from RayJay highlighting downside risks to FRC's earnings, signaled that the systemic "regional bank worry" had not abated but rather intensified and spread, leading to a broader market sell-off that pushed actual losses beyond expected limits.


==========================================================
Analyzing Breach on: March 17, 2023
==========================================================
Found 3 relevant headlines.

*** Model Result & News-Driven Cause ***

*   **Model Result:** VaR predicted a max loss of 3.66%; the actual loss was 3.74%.
*   **News Sentiment Score:** -0.412 (A score below -0.1 is typically negative)
*   **News-Driven Cause:** On March 17, 2023, the actual market loss of 3.74% notably exceeded the predicted Value at Risk (VaR) of 3.66%, primarily driven by widespread and escalating anxieties within the financial sector. News headlines from the day reflected a profound crisis, articulating that "Banks in Danger" and directly questioning if Wall Street analysts were "ignoring the banking collapse." This pervasive sense of systemic vulnerability, amplified by reports of "Short Sellers Post Profits of $3.5 Billion on Banks’ Woes," signaled a significant erosion of investor confidence in the stability of financial institutions, leading to a broad market sell-off as concerns over contagion and underlying weakness permeated sentiment.

Project 1 - Model Validation Project - Backtesting a Historical VaR Model through the 2023 Banking Crisis¶

Objective:¶

Things to know:¶

Project Prompt:¶

The Strategy:¶

Part 1 - The Quantitative Analysis¶

1.1 Importing the necessary libraries¶

1.2 Downloading data from Yahoo finance¶

1.3 Calculating log-returns of the data¶

1.4 Implementing a rolling backtest for VaR¶

1.5 Identifying the breaches¶

1.6 Visualizing the results¶

Note:¶

Part 2 - The Qualitative News Analysis¶

2.1 Importing necessary libraries and modules¶

2.2 API Key configuration¶

2.2.1 Pro-Active debugging Step¶

2.3 Defining functions¶

2.3.1 get_financial_news(...):¶

2.3.2 get_sentiment_score(...):¶

2.3.3 generate_news_summary(...):¶

2.4 Running the automated analysis¶

Concluding remarks:¶